#!/bin/bash
#
# CP2K (GNU aarch64) arch file for Linux clusters
#
# Tested with: GNU 12.3.0, MPICH 4.0.3, 
#              ScaLAPACK 2.2.1, OpenBLAS 0.3.27,
#              FFTW 3.3.10, HDF5 1.14.2,
#              LIBINT 2.6.0, LIBXC 6.2.2,
#              ELPA 2024.03.001, PLUMED 2.9.0,
#              SPGLIB 2.3.1, LIBVORI 220621,
#              GSL 2.7, COSMA 2.6.6, SIRIUS 7.5.2,
#              LIBGRPP 20231215, SPFFT 1.0.6, SPLA 1.6.0
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#
# Last update: 12.06.2024
#
# \
   if [[ "${0}" == "${BASH_SOURCE}" ]]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   cd tools/toolchain; \
   rm -rf build; \
   [[ -z "${mpi_implementation}" ]] && mpi_implementation="mpich"; \
   [[ -z "${target_cpu}" ]] && target_cpu="native"; \
   ./install_cp2k_toolchain.sh --install-all -j${maxtasks} --no-arch-files --target-cpu=${target_cpu} --with-gcc --with-${mpi_implementation} --with-libxsmm=no; \
   source ./install/setup; \
   cd ../..; \
   echo; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K only with shared libraries using"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} SHARED=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   echo "Run always the following command before using the CP2K binary"; \
   echo "   source ${PWD}/tools/toolchain/install/setup"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
SHARED         := no
TARGET_CPU     := native
USE_COSMA      := 2.6.6
USE_ELPA       := 2024.03.001
USE_FFTW       := 3.3.10
USE_HDF5       := 1.14.2
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
#USE_LIBXSMM    := 1.17
USE_OPENBLAS   := 0.3.27
USE_PLUMED     := 2.9.0
USE_QUIP       := 0.9.10
USE_SCALAPACK  := 2.2.1
USE_SIRIUS     := 7.5.2
USE_SPFFT      := 1.0.6
USE_SPGLIB     := 2.3.1
USE_SPLA       := 1.6.0
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

CC             := mpicc
FC             := mpifort
LD             := mpifort
AR             := ar -r

CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g -mtune=$(TARGET_CPU)

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__HAS_IEEE_EXCEPTIONS
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(SHARED), yes)
   LD_SHARED      := $(FC) -shared
   CFLAGS         += -fPIC
   LDFLAGS        := -Wl,--enable-new-dtags
   CP2K_LIB       := $(PWD)/lib/$(ARCH)/$(ONEVERSION)
   LDFLAGS        += -Wl,-rpath=$(CP2K_LIB)
   LDFLAGS        += -Wl,-rpath=$(CP2K_LIB)/exts/dbcsr
endif

# Settings for regression testing
ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
#  CFLAGS_DEBUG   := -fsanitize=address
   CFLAGS_DEBUG   := -fsanitize=leak
   FCFLAGS_DEBUG  := -fcheck=bounds,do,recursion,pointer
   FCFLAGS_DEBUG  += -fcheck=all,no-array-temps
   FCFLAGS_DEBUG  += -ffpe-trap=invalid,overflow,zero
   FCFLAGS_DEBUG  += -fimplicit-none
   FCFLAGS_DEBUG  += -finit-derived
   FCFLAGS_DEBUG  += -finit-real=snan
   FCFLAGS_DEBUG  += -finit-integer=-42
   FCFLAGS_DEBUG  += -finline-matmul-limit=0
   WFLAGS         := -Werror=aliasing
   WFLAGS         += -Werror=ampersand
   WFLAGS         += -Werror=c-binding-type
   WFLAGS         += -Werror=conversion
   WFLAGS         += -Werror=intrinsic-shadow
   WFLAGS         += -Werror=intrinsics-std
   WFLAGS         += -Werror=line-truncation
   WFLAGS         += -Wrealloc-lhs
   WFLAGS         += -Werror=tabs
   WFLAGS         += -Werror=target-lifetime
   WFLAGS         += -Werror=underflow
   WFLAGS         += -Werror=unused-but-set-variable
   WFLAGS         += -Werror=unused-dummy-argument
   WFLAGS         += -Werror=unused-variable
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(PLUMED_LIB) -L$(PLUMED_LIB) -lplumed -lplumedKernel
   else
      LIBS           += $(PLUMED_LIB)/libplumed.a
   endif
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/cpu/include/elpa_openmp-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/cpu/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(ELPA_LIB) -L$(ELPA_LIB) -lelpa_openmp
   else
      LIBS           += $(ELPA_LIB)/libelpa_openmp.a
   endif
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBVORI_LIB) -L$(LIBVORI_LIB) -lvori
   else
      LIBS           += $(LIBVORI_LIB)/libvori.a
   endif
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBXC_LIB) -L$(LIBXC_LIB) -lxcf03 -lxc
   else
      LIBS           += $(LIBXC_LIB)/libxcf03.a
      LIBS           += $(LIBXC_LIB)/libxc.a
   endif
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBINT_LIB) -L$(LIBINT_LIB) -lint2
   else
      LIBS           += $(LIBINT_LIB)/libint2.a
      LIBS           += $(LIBINT_LIB)/libint2.a
   endif
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SPGLIB_LIB) -L$(SPGLIB_LIB) -lsymspg
   else
      LIBS           += $(SPGLIB_LIB)/libsymspg.a
   endif
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBXSMM_LIB) -L$(LIBXSMM_LIB) -lxsmmf -lxsmm
   else
      LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
      LIBS           += $(LIBXSMM_LIB)/libxsmm.a
   endif
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
   SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SIRIUS
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SIRIUS_LIB) -L$(SIRIUS_LIB) -lsirius
      LIBS           += -Wl,-rpath=$(LIBVDWXC_LIB) -L$(LIBVDWXC_LIB) -lvdwxc
   else
      LIBS           += $(SIRIUS_LIB)/libsirius.a
      LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   endif
endif

ifneq ($(USE_SPFFT),)
   USE_SPFFT      := $(strip $(USE_SPFFT))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(USE_SPFFT)/include
   SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(USE_SPFFT)/lib
   CFLAGS         += -I$(SPFFT_INC)
   DFLAGS         += -D__SPFFT
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SPFFT_LIB) -L$(SPFFT_LIB) -lspfft
   else
      LIBS           += $(SPFFT_LIB)/libspfft.a
   endif
endif

ifneq ($(USE_SPLA),)
   USE_SPLA       := $(strip $(USE_SPLA))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(USE_SPLA)/include/spla
   SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(USE_SPLA)/lib
   CFLAGS         += -I$(SPLA_INC)
   DFLAGS         += -D__SPLA
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SPLA_LIB) -L$(SPLA_LIB) -lspla
   else
      LIBS           += $(SPLA_LIB)/libspla.a
   endif
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath,$(HDF5_LIB) -L$(HDF5_LIB) -lhdf5_fortran -lhdf5
   else
      LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
      LIBS           += $(HDF5_LIB)/libhdf5_f90cstub.a
      LIBS           += $(HDF5_LIB)/libhdf5.a
   endif
   LIBS           += -lsz
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(COSMA_LIB) -L$(COSMA_LIB) -lcosma_prefixed_pxgemm -lcosma -lcosta
   else
      LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
      LIBS           += $(COSMA_LIB)/libcosma.a
      LIBS           += $(COSMA_LIB)/libcosta.a
   endif
endif

ifneq ($(USE_FFTW),)
   USE_FFTW       := $(strip $(USE_FFTW))
   FFTW_INC       := $(INSTALL_PATH)/fftw-$(USE_FFTW)/include
   FFTW_LIB       := $(INSTALL_PATH)/fftw-$(USE_FFTW)/lib
   CFLAGS         += -I$(FFTW_INC)
   DFLAGS         += -D__FFTW3
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(FFTW_LIB) -L$(FFTW_LIB) -lfftw3_mpi -lfftw3_omp -lfftw3
   else
      LIBS           += $(FFTW_LIB)/libfftw3_mpi.a
      LIBS           += $(FFTW_LIB)/libfftw3_omp.a
      LIBS           += $(FFTW_LIB)/libfftw3.a
   endif
endif

ifneq ($(USE_SCALAPACK),)
   SCALAPACK_LIB  := $(INSTALL_PATH)/scalapack-$(USE_SCALAPACK)/lib
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SCALAPACK_LIB) -L$(SCALAPACK_LIB) -lscalapack
   else
      LIBS           += $(SCALAPACK_LIB)/libscalapack.a
   endif
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(GSL_LIB) -L$(GSL_LIB) -lgsl
   else
      LIBS           += $(GSL_LIB)/libgsl.a
   endif
endif

ifneq ($(USE_OPENBLAS),)
   USE_OPENBLAS   := $(strip $(USE_OPENBLAS))
   OPENBLAS_INC   := $(INSTALL_PATH)/openblas-$(USE_OPENBLAS)/include
   OPENBLAS_LIB   := $(INSTALL_PATH)/openblas-$(USE_OPENBLAS)/lib
   CFLAGS         += -I$(OPENBLAS_INC)
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(OPENBLAS_LIB) -L$(OPENBLAS_LIB) -lopenblas
   else
      LIBS           += $(OPENBLAS_LIB)/libopenblas.a
   endif
endif

CFLAGS         += $(DFLAGS) $(CFLAGS_DEBUG)

FCFLAGS        := $(CFLAGS) $(FCFLAGS_DEBUG) $(WFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

LDFLAGS        += $(FCFLAGS)

LIBS           += -lz -ldl -lstdc++

# End
